/*-------------------------------------------------------------------------
 *
 * slab.c
 *      SLAB allocator definitions.
 *
 * SLAB is a MemoryContext implementation designed for cases where large
 * numbers of equally-sized objects are allocated (and freed).
 *
 *
 * Portions Copyright (c) 2017, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *      src/backend/utils/mmgr/slab.c
 *
 *
 * NOTE:
 *    The constant allocation size allows significant simplification and various
 *    optimizations over more general purpose allocators. The blocks are carved
 *    into chunks of exactly the right size (plus alignment), not wasting any
 *    memory.
 *
 *    The information about free chunks is maintained both at the block level and
 *    global (context) level. This is possible as the chunk size (and thus also
 *    the number of chunks per block) is fixed.
 *
 *    On each block, free chunks are tracked in a simple linked list. Contents
 *    of free chunks is replaced with an index of the next free chunk, forming
 *    a very simple linked list. Each block also contains a counter of free
 *    chunks. Combined with the local block-level freelist, it makes it trivial
 *    to eventually free the whole block.
 *
 *    At the context level, we use 'freelist' to track blocks ordered by number
 *    of free chunks, starting with blocks having a single allocated chunk, and
 *    with completely full blocks on the tail.
 *
 *    This also allows various optimizations - for example when searching for
 *    free chunk, the allocator reuses space from the fullest blocks first, in
 *    the hope that some of the less full blocks will get completely empty (and
 *    returned back to the OS).
 *
 *    For each block, we maintain pointer to the first free chunk - this is quite
 *    cheap and allows us to skip all the preceding used chunks, eliminating
 *    a significant number of lookups in many common usage patters. In the worst
 *    case this performs as if the pointer was not maintained.
 *
 *    We cache the freelist index for the blocks with the fewest free chunks
 *    (minFreeChunks), so that we don't have to search the freelist on every
 *    SlabAlloc() call, which is quite expensive.
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "utils/memdebug.h"
#include "utils/memutils.h"
#include "lib/ilist.h"


/*
 * SlabContext is a specialized implementation of MemoryContext.
 */
typedef struct SlabContext
{
    MemoryContextData header;    /* Standard memory-context fields */
    /* Allocation parameters for this context: */
    Size        chunkSize;        /* chunk size */
    Size        fullChunkSize;    /* chunk size including header and alignment */
    Size        blockSize;        /* block size */
    int            chunksPerBlock; /* number of chunks per block */
    int            minFreeChunks;    /* min number of free chunks in any block */
    int            nblocks;        /* number of blocks allocated */
    /* blocks with free space, grouped by number of free chunks: */
    dlist_head    freelist[FLEXIBLE_ARRAY_MEMBER];
} SlabContext;

/*
 * SlabBlock
 *        Structure of a single block in SLAB allocator.
 *
 * node: doubly-linked list of blocks in global freelist
 * nfree: number of free chunks in this block
 * firstFreeChunk: index of the first free chunk
 */
typedef struct SlabBlock
{
    dlist_node    node;            /* doubly-linked list */
    int            nfree;            /* number of free chunks */
    int            firstFreeChunk; /* index of the first free chunk in the block */
} SlabBlock;

/*
 * SlabChunk
 *        The prefix of each piece of memory in an SlabBlock
 */
typedef struct SlabChunk
{
    /* block owning this chunk */
    void       *block;
    SlabContext *slab;            /* owning context */
    /* there must not be any padding to reach a MAXALIGN boundary here! */
} SlabChunk;


#define SlabPointerGetChunk(ptr)    \
    ((SlabChunk *)(((char *)(ptr)) - sizeof(SlabChunk)))
#define SlabChunkGetPointer(chk)    \
    ((void *)(((char *)(chk)) + sizeof(SlabChunk)))
#define SlabBlockGetChunk(slab, block, idx) \
    ((SlabChunk *) ((char *) (block) + sizeof(SlabBlock)    \
                    + (idx * slab->fullChunkSize)))
#define SlabBlockStart(block)    \
    ((char *) block + sizeof(SlabBlock))
#define SlabChunkIndex(slab, block, chunk)    \
    (((char *) chunk - SlabBlockStart(block)) / slab->fullChunkSize)

/*
 * These functions implement the MemoryContext API for Slab contexts.
 */
static void *SlabAlloc(MemoryContext context, Size size);
static void SlabFree(MemoryContext context, void *pointer);
static void *SlabRealloc(MemoryContext context, void *pointer, Size size);
static void SlabInit(MemoryContext context);
static void SlabReset(MemoryContext context);
static void SlabDelete(MemoryContext context);
static Size SlabGetChunkSpace(MemoryContext context, void *pointer);
static bool SlabIsEmpty(MemoryContext context);
static void SlabStats(MemoryContext context, int level, bool print,
          MemoryContextCounters *totals);
#ifdef MEMORY_CONTEXT_CHECKING
static void SlabCheck(MemoryContext context);
#endif

/*
 * This is the virtual function table for Slab contexts.
 */
static MemoryContextMethods SlabMethods = {
    SlabAlloc,
    SlabFree,
    SlabRealloc,
    SlabInit,
    SlabReset,
    SlabDelete,
    SlabGetChunkSpace,
    SlabIsEmpty,
    SlabStats
#ifdef MEMORY_CONTEXT_CHECKING
    ,SlabCheck
#endif
};

/* ----------
 * Debug macros
 * ----------
 */
#ifdef HAVE_ALLOCINFO
#define SlabFreeInfo(_cxt, _chunk) \
            fprintf(stderr, "SlabFree: %s: %p, %zu\n", \
                (_cxt)->header.name, (_chunk), (_chunk)->header.size)
#define SlabAllocInfo(_cxt, _chunk) \
            fprintf(stderr, "SlabAlloc: %s: %p, %zu\n", \
                (_cxt)->header.name, (_chunk), (_chunk)->header.size)
#else
#define SlabFreeInfo(_cxt, _chunk)
#define SlabAllocInfo(_cxt, _chunk)
#endif


/*
 * SlabContextCreate
 *        Create a new Slab context.
 *
 * parent: parent context, or NULL if top-level context
 * name: name of context (for debugging --- string will be copied)
 * blockSize: allocation block size
 * chunkSize: allocation chunk size
 *
 * The chunkSize may not exceed:
 *        MAXALIGN_DOWN(SIZE_MAX) - MAXALIGN(sizeof(SlabBlock)) - SLAB_CHUNKHDRSZ
 *
 */
MemoryContext
SlabContextCreate(MemoryContext parent,
                  const char *name,
                  Size blockSize,
                  Size chunkSize)
{
    int            chunksPerBlock;
    Size        fullChunkSize;
    Size        freelistSize;
    SlabContext *slab;

    StaticAssertStmt(offsetof(SlabChunk, slab) + sizeof(MemoryContext) ==
                     MAXALIGN(sizeof(SlabChunk)),
                     "padding calculation in SlabChunk is wrong");

    /* Make sure the linked list node fits inside a freed chunk */
    if (chunkSize < sizeof(int))
        chunkSize = sizeof(int);

    /* chunk, including SLAB header (both addresses nicely aligned) */
    fullChunkSize = MAXALIGN(sizeof(SlabChunk) + MAXALIGN(chunkSize));

    /* Make sure the block can store at least one chunk. */
    if (blockSize - sizeof(SlabBlock) < fullChunkSize)
        elog(ERROR, "block size %zu for slab is too small for %zu chunks",
             blockSize, chunkSize);

    /* Compute maximum number of chunks per block */
    chunksPerBlock = (blockSize - sizeof(SlabBlock)) / fullChunkSize;

    /* The freelist starts with 0, ends with chunksPerBlock. */
    freelistSize = sizeof(dlist_head) * (chunksPerBlock + 1);

    /* if we can't fit at least one chunk into the block, we're hosed */
    Assert(chunksPerBlock > 0);

    /* make sure the chunks actually fit on the block    */
    Assert((fullChunkSize * chunksPerBlock) + sizeof(SlabBlock) <= blockSize);

    /* Do the type-independent part of context creation */
    slab = (SlabContext *)
        MemoryContextCreate(T_SlabContext,
                            (offsetof(SlabContext, freelist) + freelistSize),
                            &SlabMethods,
                            parent,
                            name);

    slab->blockSize = blockSize;
    slab->chunkSize = chunkSize;
    slab->fullChunkSize = fullChunkSize;
    slab->chunksPerBlock = chunksPerBlock;
    slab->nblocks = 0;
    slab->minFreeChunks = 0;

    return (MemoryContext) slab;
}

/*
 * SlabInit
 *        Context-type-specific initialization routine.
 */
static void
SlabInit(MemoryContext context)
{
    int            i;
    SlabContext *slab = castNode(SlabContext, context);

    Assert(slab);

    /* initialize the freelist slots */
    for (i = 0; i < (slab->chunksPerBlock + 1); i++)
        dlist_init(&slab->freelist[i]);
}

/*
 * SlabReset
 *        Frees all memory which is allocated in the given set.
 *
 * The code simply frees all the blocks in the context - we don't keep any
 * keeper blocks or anything like that.
 */
static void
SlabReset(MemoryContext context)
{
    int            i;
    SlabContext *slab = castNode(SlabContext, context);

    Assert(slab);

#ifdef MEMORY_CONTEXT_CHECKING
    /* Check for corruption and leaks before freeing */
    SlabCheck(context);
#endif

    /* walk over freelists and free the blocks */
    for (i = 0; i <= slab->chunksPerBlock; i++)
    {
        dlist_mutable_iter miter;

        dlist_foreach_modify(miter, &slab->freelist[i])
        {
            SlabBlock  *block = dlist_container(SlabBlock, node, miter.cur);

            dlist_delete(miter.cur);

#ifdef CLOBBER_FREED_MEMORY
            wipe_mem(block, slab->blockSize);
#endif
            free(block);
            slab->nblocks--;
        }
    }

    slab->minFreeChunks = 0;

    Assert(slab->nblocks == 0);
}

/*
 * SlabDelete
 *        Frees all memory which is allocated in the given slab, in preparation
 *        for deletion of the slab. We simply call SlabReset().
 */
static void
SlabDelete(MemoryContext context)
{
    /* just reset the context */
    SlabReset(context);
}

/*
 * SlabAlloc
 *        Returns pointer to allocated memory of given size or NULL if
 *        request could not be completed; memory is added to the slab.
 */
static void *
SlabAlloc(MemoryContext context, Size size)
{// #lizard forgives
    SlabContext *slab = castNode(SlabContext, context);
    SlabBlock  *block;
    SlabChunk  *chunk;
    int            idx;

    Assert(slab);

    Assert((slab->minFreeChunks >= 0) &&
           (slab->minFreeChunks < slab->chunksPerBlock));

    /* make sure we only allow correct request size */
    if (size != slab->chunkSize)
        elog(ERROR, "unexpected alloc chunk size %zu (expected %zu)",
             size, slab->chunkSize);

    /*
     * If there are no free chunks in any existing block, create a new block
     * and put it to the last freelist bucket.
     *
     * slab->minFreeChunks == 0 means there are no blocks with free chunks,
     * thanks to how minFreeChunks is updated at the end of SlabAlloc().
     */
    if (slab->minFreeChunks == 0)
    {
        block = (SlabBlock *) malloc(slab->blockSize);

        if (block == NULL)
            return NULL;

        block->nfree = slab->chunksPerBlock;
        block->firstFreeChunk = 0;

        /*
         * Put all the chunks on a freelist. Walk the chunks and point each
         * one to the next one.
         */
        for (idx = 0; idx < slab->chunksPerBlock; idx++)
        {
            chunk = SlabBlockGetChunk(slab, block, idx);
            *(int32 *) SlabChunkGetPointer(chunk) = (idx + 1);
        }

        /*
         * And add it to the last freelist with all chunks empty.
         *
         * We know there are no blocks in the freelist, otherwise we wouldn't
         * need a new block.
         */
        Assert(dlist_is_empty(&slab->freelist[slab->chunksPerBlock]));

        dlist_push_head(&slab->freelist[slab->chunksPerBlock], &block->node);

        slab->minFreeChunks = slab->chunksPerBlock;
        slab->nblocks += 1;
    }

    /* grab the block from the freelist (even the new block is there) */
    block = dlist_head_element(SlabBlock, node,
                               &slab->freelist[slab->minFreeChunks]);

    /* make sure we actually got a valid block, with matching nfree */
    Assert(block != NULL);
    Assert(slab->minFreeChunks == block->nfree);
    Assert(block->nfree > 0);

    /* we know index of the first free chunk in the block */
    idx = block->firstFreeChunk;

    /* make sure the chunk index is valid, and that it's marked as empty */
    Assert((idx >= 0) && (idx < slab->chunksPerBlock));

    /* compute the chunk location block start (after the block header) */
    chunk = SlabBlockGetChunk(slab, block, idx);

    /*
     * Update the block nfree count, and also the minFreeChunks as we've
     * decreased nfree for a block with the minimum number of free chunks
     * (because that's how we chose the block).
     */
    block->nfree--;
    slab->minFreeChunks = block->nfree;

    /*
     * Remove the chunk from the freelist head. The index of the next free
     * chunk is stored in the chunk itself.
     */
    VALGRIND_MAKE_MEM_DEFINED(SlabChunkGetPointer(chunk), sizeof(int32));
    block->firstFreeChunk = *(int32 *) SlabChunkGetPointer(chunk);

    Assert(block->firstFreeChunk >= 0);
    Assert(block->firstFreeChunk <= slab->chunksPerBlock);

    Assert((block->nfree != 0 &&
            block->firstFreeChunk < slab->chunksPerBlock) ||
           (block->nfree == 0 &&
            block->firstFreeChunk == slab->chunksPerBlock));

    /* move the whole block to the right place in the freelist */
    dlist_delete(&block->node);
    dlist_push_head(&slab->freelist[block->nfree], &block->node);

    /*
     * And finally update minFreeChunks, i.e. the index to the block with the
     * lowest number of free chunks. We only need to do that when the block
     * got full (otherwise we know the current block is the right one). We'll
     * simply walk the freelist until we find a non-empty entry.
     */
    if (slab->minFreeChunks == 0)
    {
        for (idx = 1; idx <= slab->chunksPerBlock; idx++)
        {
            if (dlist_is_empty(&slab->freelist[idx]))
                continue;

            /* found a non-empty freelist */
            slab->minFreeChunks = idx;
            break;
        }
    }

    if (slab->minFreeChunks == slab->chunksPerBlock)
        slab->minFreeChunks = 0;

    /* Prepare to initialize the chunk header. */
    VALGRIND_MAKE_MEM_UNDEFINED(chunk, sizeof(SlabChunk));

    chunk->block = (void *) block;
    chunk->slab = slab;

#ifdef MEMORY_CONTEXT_CHECKING
    /* slab mark to catch clobber of "unused" space */
    if (slab->chunkSize < (slab->fullChunkSize - sizeof(SlabChunk)))
    {
        set_sentinel(SlabChunkGetPointer(chunk), size);
        VALGRIND_MAKE_MEM_NOACCESS(((char *) chunk) +
                                   sizeof(SlabChunk) + slab->chunkSize,
                                   slab->fullChunkSize -
                                   (slab->chunkSize + sizeof(SlabChunk)));
    }
#endif
#ifdef RANDOMIZE_ALLOCATED_MEMORY
    /* fill the allocated space with junk */
    randomize_mem((char *) SlabChunkGetPointer(chunk), size);
#endif

    SlabAllocInfo(slab, chunk);
    return SlabChunkGetPointer(chunk);
}

/*
 * SlabFree
 *        Frees allocated memory; memory is removed from the slab.
 */
static void
SlabFree(MemoryContext context, void *pointer)
{// #lizard forgives
    int            idx;
    SlabContext *slab = castNode(SlabContext, context);
    SlabChunk  *chunk = SlabPointerGetChunk(pointer);
    SlabBlock  *block = chunk->block;

    SlabFreeInfo(slab, chunk);

#ifdef MEMORY_CONTEXT_CHECKING
    /* Test for someone scribbling on unused space in chunk */
    if (slab->chunkSize < (slab->fullChunkSize - sizeof(SlabChunk)))
        if (!sentinel_ok(pointer, slab->chunkSize))
            elog(WARNING, "detected write past chunk end in %s %p",
                 slab->header.name, chunk);
#endif

    /* compute index of the chunk with respect to block start */
    idx = SlabChunkIndex(slab, block, chunk);

    /* add chunk to freelist, and update block nfree count */
    *(int32 *) pointer = block->firstFreeChunk;
    block->firstFreeChunk = idx;
    block->nfree++;

    Assert(block->nfree > 0);
    Assert(block->nfree <= slab->chunksPerBlock);

#ifdef CLOBBER_FREED_MEMORY
    /* XXX don't wipe the int32 index, used for block-level freelist */
    wipe_mem((char *) pointer + sizeof(int32),
             slab->chunkSize - sizeof(int32));
#endif

    /* remove the block from a freelist */
    dlist_delete(&block->node);

    /*
     * See if we need to update the minFreeChunks field for the slab - we only
     * need to do that if there the block had that number of free chunks
     * before we freed one. In that case, we check if there still are blocks
     * in the original freelist and we either keep the current value (if there
     * still are blocks) or increment it by one (the new block is still the
     * one with minimum free chunks).
     *
     * The one exception is when the block will get completely free - in that
     * case we will free it, se we can't use it for minFreeChunks. It however
     * means there are no more blocks with free chunks.
     */
    if (slab->minFreeChunks == (block->nfree - 1))
    {
        /* Have we removed the last chunk from the freelist? */
        if (dlist_is_empty(&slab->freelist[slab->minFreeChunks]))
        {
            /* but if we made the block entirely free, we'll free it */
            if (block->nfree == slab->chunksPerBlock)
                slab->minFreeChunks = 0;
            else
                slab->minFreeChunks++;
        }
    }

    /* If the block is now completely empty, free it. */
    if (block->nfree == slab->chunksPerBlock)
    {
        free(block);
        slab->nblocks--;
    }
    else
        dlist_push_head(&slab->freelist[block->nfree], &block->node);

    Assert(slab->nblocks >= 0);
}

/*
 * SlabRealloc
 *        Change the allocated size of a chunk.
 *
 * As Slab is designed for allocating equally-sized chunks of memory, it can't
 * do an actual chunk size change.  We try to be gentle and allow calls with
 * exactly the same size, as in that case we can simply return the same
 * chunk.  When the size differs, we throw an error.
 *
 * We could also allow requests with size < chunkSize.  That however seems
 * rather pointless - Slab is meant for chunks of constant size, and moreover
 * realloc is usually used to enlarge the chunk.
 */
static void *
SlabRealloc(MemoryContext context, void *pointer, Size size)
{
    SlabContext *slab = castNode(SlabContext, context);

    Assert(slab);

    /* can't do actual realloc with slab, but let's try to be gentle */
    if (size == slab->chunkSize)
        return pointer;

    elog(ERROR, "slab allocator does not support realloc()");
    return NULL;                /* keep compiler quiet */
}

/*
 * SlabGetChunkSpace
 *        Given a currently-allocated chunk, determine the total space
 *        it occupies (including all memory-allocation overhead).
 */
static Size
SlabGetChunkSpace(MemoryContext context, void *pointer)
{
    SlabContext *slab = castNode(SlabContext, context);

    Assert(slab);

    return slab->fullChunkSize;
}

/*
 * SlabIsEmpty
 *        Is an Slab empty of any allocated space?
 */
static bool
SlabIsEmpty(MemoryContext context)
{
    SlabContext *slab = castNode(SlabContext, context);

    Assert(slab);

    return (slab->nblocks == 0);
}

/*
 * SlabStats
 *        Compute stats about memory consumption of an Slab.
 *
 * level: recursion level (0 at top level); used for print indentation.
 * print: true to print stats to stderr.
 * totals: if not NULL, add stats about this Slab into *totals.
 */
static void
SlabStats(MemoryContext context, int level, bool print,
          MemoryContextCounters *totals)
{
    SlabContext *slab = castNode(SlabContext, context);
    Size        nblocks = 0;
    Size        freechunks = 0;
    Size        totalspace = 0;
    Size        freespace = 0;
    int            i;

    Assert(slab);

    for (i = 0; i <= slab->chunksPerBlock; i++)
    {
        dlist_iter    iter;

        dlist_foreach(iter, &slab->freelist[i])
        {
            SlabBlock  *block = dlist_container(SlabBlock, node, iter.cur);

            nblocks++;
            totalspace += slab->blockSize;
            freespace += slab->fullChunkSize * block->nfree;
            freechunks += block->nfree;
        }
    }

    if (print)
    {
        for (i = 0; i < level; i++)
            fprintf(stderr, "  ");
        fprintf(stderr,
                "Slab: %s: %zu total in %zd blocks; %zu free (%zd chunks); %zu used\n",
                slab->header.name, totalspace, nblocks, freespace, freechunks,
                totalspace - freespace);
    }

    if (totals)
    {
        totals->nblocks += nblocks;
        totals->freechunks += freechunks;
        totals->totalspace += totalspace;
        totals->freespace += freespace;
    }
}


#ifdef MEMORY_CONTEXT_CHECKING

/*
 * SlabCheck
 *        Walk through chunks and check consistency of memory.
 *
 * NOTE: report errors as WARNING, *not* ERROR or FATAL.  Otherwise you'll
 * find yourself in an infinite loop when trouble occurs, because this
 * routine will be entered again when elog cleanup tries to release memory!
 */
static void
SlabCheck(MemoryContext context)
{// #lizard forgives
    int            i;
    SlabContext *slab = castNode(SlabContext, context);
    char       *name = slab->header.name;
    char       *freechunks;

    Assert(slab);
    Assert(slab->chunksPerBlock > 0);

    /* bitmap of free chunks on a block */
    freechunks = palloc(slab->chunksPerBlock * sizeof(bool));

    /* walk all the freelists */
    for (i = 0; i <= slab->chunksPerBlock; i++)
    {
        int            j,
                    nfree;
        dlist_iter    iter;

        /* walk all blocks on this freelist */
        dlist_foreach(iter, &slab->freelist[i])
        {
            int            idx;
            SlabBlock  *block = dlist_container(SlabBlock, node, iter.cur);

            /*
             * Make sure the number of free chunks (in the block header)
             * matches position in the freelist.
             */
            if (block->nfree != i)
                elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match freelist %d",
                     name, block->nfree, block, i);

            /* reset the bitmap of free chunks for this block */
            memset(freechunks, 0, (slab->chunksPerBlock * sizeof(bool)));
            idx = block->firstFreeChunk;

            /*
             * Now walk through the chunks, count the free ones and also
             * perform some additional checks for the used ones. As the chunk
             * freelist is stored within the chunks themselves, we have to
             * walk through the chunks and construct our own bitmap.
             */

            nfree = 0;
            while (idx < slab->chunksPerBlock)
            {
                SlabChunk  *chunk;

                /* count the chunk as free, add it to the bitmap */
                nfree++;
                freechunks[idx] = true;

                /* read index of the next free chunk */
                chunk = SlabBlockGetChunk(slab, block, idx);
                VALGRIND_MAKE_MEM_DEFINED(SlabChunkGetPointer(chunk), sizeof(int32));
                idx = *(int32 *) SlabChunkGetPointer(chunk);
            }

            for (j = 0; j < slab->chunksPerBlock; j++)
            {
                /* non-zero bit in the bitmap means chunk the chunk is used */
                if (!freechunks[j])
                {
                    SlabChunk  *chunk = SlabBlockGetChunk(slab, block, j);

                    /* chunks have both block and slab pointers, so check both */
                    if (chunk->block != block)
                        elog(WARNING, "problem in slab %s: bogus block link in block %p, chunk %p",
                             name, block, chunk);

                    if (chunk->slab != slab)
                        elog(WARNING, "problem in slab %s: bogus slab link in block %p, chunk %p",
                             name, block, chunk);

                    /* there might be sentinel (thanks to alignment) */
                    if (slab->chunkSize < (slab->fullChunkSize - sizeof(SlabChunk)))
                        if (!sentinel_ok(chunk, slab->chunkSize))
                            elog(WARNING, "problem in slab %s: detected write past chunk end in block %p, chunk %p",
                                 name, block, chunk);
                }
            }

            /*
             * Make sure we got the expected number of free chunks (as tracked
             * in the block header).
             */
            if (nfree != block->nfree)
                elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match bitmap %d",
                     name, block->nfree, block, nfree);
        }
    }
}

#endif                            /* MEMORY_CONTEXT_CHECKING */
